GPS loggers are not the same for successful and failed breeders so they have to be processed differently before being merged. Moreover, data from 2015 and 2018 have different formats and different constraints so they are also processed separately. This file aims at formatting the data and compare results for 2015 when cleaning trips based on different threshold parameters.

Bird data import for 2015

Import data for successful breerders

Loggers deployed on successful breeders are IgotU GPS. They give LOCAL TIME in UTC (GMT+5) and record locations every 5 minutes.

library(fields)
library(maptools)
library(maps)
library(stringr)
library(xlsx)
library(plotrix)
library(pastecs)
library(kableExtra)
library(plyr)
library(lme4)
library(afex)
library(simr)
source("R scripts/Functions.R")

#Coordinates of the colony
colony<-data.frame(Longitude =77.52356,Latitude=-37.8545)
date.max<-as.POSIXct("2015-12-22 12:00:00", format="%F %H:%M:%S",tz="UTC") 

##Import data for successful birds (classical GPS)
success.2015<-read.table("Data/RawData/ams_yna_2015-16_gps_success.txt",header=T,sep="\t")
success.2015$Status<-as.factor("success")
success.2015$DateTime<-as.POSIXct(paste(strptime(success.2015$Date,format="%d/%m/%Y"),
                                        success.2015$Time,sep=" "),tz="UTC")
success.2015<-success.2015[,c("Logger.ID","Status","DateTime","Longitude","Latitude","Speed","Altitude")]

#distance between location and colony
success.2015$Distmax<-as.vector(rdist.earth(success.2015[,c("Longitude","Latitude")],
                                       colony[1,c("Longitude","Latitude")],miles=F))  
names(success.2015)[1]<-"ID"
success.2015$Distadj<-0
success.2015$Difftime<-0

#calculate distance between 2 consecutive points and time elapsed between 2 consecutive points
for ( n in 2:nrow(success.2015)){
  success.2015$Distadj[n]<-rdist.earth(success.2015[n,c("Longitude","Latitude")],
                                  success.2015[n-1,c("Longitude","Latitude")],miles=F)
   success.2015$Difftime[n]<-difftime(success.2015$DateTime[n],success.2015$DateTime[n-1],units="mins")
}

#remove time difference when changing individuals
success.2015$Difftime[which(success.2015$Difftime<0)]<-0

Import data for failed breeders

Loggers deployed on failed breeders are Ecotone GPS-UHF. They give time in GMT so 5h have to be added to obtain local time (UTC=GMT+5).They record locations every 5-15 minutes, depending on weather conditions. They provide more gaps in the data

##Import data for failed birds (GPS-UHF)
fail.2015<-read.table("Data/RawData/ams_yna_2015-16_gps_fail.txt",header=T,sep="\t")
fail.2015$Date<-paste(fail.2015$Year,fail.2015$Month,fail.2015$Day,sep="/")
fail.2015$Time<-paste(fail.2015$Hour,fail.2015$Minute,fail.2015$Second,sep=":")
fail.2015$Status<-as.factor("fail")
fail.2015$DateTime<-as.POSIXct(paste(strptime(fail.2015$Date,format="%Y/%m/%d"),fail.2015$Time,sep=" "),tz="UTC") 
fail.2015$DateTime<-fail.2015$DateTime + 5*3600
fail.2015<-fail.2015[,c("Logger.ID","Status","DateTime","Longitude","Latitude","Speed","Altitude","In.range")]

#when "in.range" is true, locations are set to colony coordinates
fail.2015$Longitude[which(is.na(fail.2015$Longitude)==T & fail.2015$In.range==1)]<-colony$Longitude   
fail.2015$Latitude[which(is.na(fail.2015$Latitude)==T & fail.2015$In.range==1)]<-colony$Latitude 
fail.2015<-fail.2015[!is.na(fail.2015$Latitude)==T,]

#distance between location and colony
fail.2015$Distmax<-NA
fail.2015$Distmax<-as.vector(rdist.earth(fail.2015[,c("Longitude","Latitude")],
                                         colony[1,c("Longitude","Latitude")],miles=F))
fail.2015$Distmax[fail.2015$In.range==1]<-0
names(fail.2015)[1]<-"ID"
fail.2015<-fail.2015[,-8]
fail.2015$Distadj<-0

#calculate distance between 2 consecutive points and time elapsed between 2 consecutive points
fail.2015$Difftime<-0
for (n in 2:nrow(fail.2015)){
  fail.2015$Distadj[n]<-rdist.earth(fail.2015[n,c("Longitude","Latitude")],
                               fail.2015[n-1,c("Longitude","Latitude")],miles=F)
  fail.2015$Difftime[n]<-difftime(fail.2015$DateTime[n],fail.2015$DateTime[n-1],units="mins")
}
fail.2015$Difftime[which(fail.2015$Difftime<0)]<-0
fail.2015<-subset(fail.2015,fail.2015$DateTime < date.max)

Merge data 2015, define trips and clean trips

Parameters to define a trip are based on distance to the colony, number of consecutive locations out of the colony and total duration of the trip.

dataset.2015<-rbind(success.2015,fail.2015)
dataset.2015<-dataset.2015[duplicated(dataset.2015$DateTime)==FALSE,] # remove duplicated points with the same date and time
dataset.2015$TravelNb<-"NA"
dataset.2015$PathLength<-0

##Parameters to define a trip in 2018
dist.thres<-1             #distance threshold (in km)
last.dist<-10           #distance threshold (in km) when having only one location between 2 trips
row.thres<- 12               #number of rows constituting a trip
dur.thres<-120         #min duration of a trip

#Define trip number within each individual
dataset.2015<-define_trips(dataset.2015)   #function from source file
dataset.2015$TravelID<-paste(dataset.2015$ID,dataset.2015$TravelNb,sep=".")

save(dataset.2015,file="Data/NewlyCreatedData/raw_trips_2015.RData")

#summarize raw trips by individuals or status. 
#There is no time gap thresholds here, just total duration and distance
dist.max.all.trips.2018.status<-raw_trips_summary_status(dataset.2015)  #function from source file

#10h gaps
diff.thres<-10*60
trips10h<-clean_trips_summary_status(dataset.2015,diff.thres)    #function from source file

#8h gaps
diff.thres<-8*60
trips8h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#6h gaps
diff.thres<-6*60
trips6h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#5h gaps
diff.thres<-5*60
trips5h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#4h gaps
diff.thres<-4*60
trips4h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#3h gaps
diff.thres<-3*60
trips3h<-clean_trips_summary_status(dataset.2015,diff.thres)  

Summary of raw and selected trips by status

The first table shows the summary of trip characteristics with raw data (without any time gap threshold). The others show the summary of trip characteristics with different time gap thresholds.

Summary of raw trips
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 24 340.4639 39.92386 155.250865 852.6335 52.79950 6.405953 17.417778 171.0675 0.1633333 0.6908333 1098.5590 407.3176 3205.429
fail 13 30 459.7640 70.51314 0.234768 1229.8146 76.66317 10.809942 3.783333 211.5833 0.2000000 57.9500000 742.4454 0.0000 2312.912
Summary of trips with gaps <10h
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 0.1633333 0.6908333 1106.5136 407.31755 3205.429
fail 12 23 416.3060 80.72137 28.6290 1229.8146 58.62743 10.668947 3.783333 163.0244 0.2000000 5.3666667 715.2378 58.46149 2312.912
Summary of trips with gaps <8h
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 0.1633333 0.6908333 1106.5136 407.31755 3205.429
fail 12 23 416.3060 80.72137 28.6290 1229.8146 58.62743 10.668947 3.783333 163.0244 0.2000000 5.3666667 715.2378 58.46149 2312.912
Summary trips with gaps <6h
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 0.1633333 0.6908333 1106.5136 407.31755 3205.429
fail 12 23 416.3060 80.72137 28.6290 1229.8146 58.62743 10.668947 3.783333 163.0244 0.2000000 5.3666667 715.2378 58.46149 2312.912
Summary trips with gaps <5h
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 0.1633333 0.6908333 1106.5136 407.31755 3205.429
fail 10 20 372.8003 84.22744 28.6290 1229.8146 47.87890 9.562511 3.783333 137.9333 0.2000000 4.8500000 693.4722 58.46149 2312.912
Summary trips with gaps <4h
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 0.1633333 0.6908333 1106.5136 407.31755 3205.429
fail 9 18 337.8395 85.29881 28.6290 1229.8146 44.99270 10.110656 3.783333 137.9333 0.2000000 3.4977778 668.5404 58.46149 2312.912
Summary trips with gaps <3h
Status NbInd NbTravel MeanDist SDDist MinDist MaxDist MeanDur SDDur MinTripDur MaxTripDur MinmaxDiff MaxmaxDiff MeanTotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 0.1633333 0.6908333 1106.5136 407.31755 3205.429
fail 8 16 237.5070 56.30552 28.6290 826.0564 33.54927 7.193742 3.783333 102.1197 0.2000000 2.6166667 517.7086 58.46149 1443.117

Plots of trips by individuals

In the plots, all black dots represent trips/data that have been excluded. All colored trips are the selected ones.

Interpolation of clean trips

Proportion of time gaps >16, 20 and 30min are calculated to choose the best interpolation

diff.thres<- 5*60
clean.trips.loc.2015<-clean_trips_locations(dataset.2015,diff.thres)
clean.trips.loc.2015$TravelID<-paste(clean.trips.loc.2015$ID,clean.trips.loc.2015$TravelNb,sep=".")


#Number of gaps > 16min
length(which(clean.trips.loc.2015$Difftime>16))/nrow(clean.trips.loc.2015)*100
## [1] 2.164736
#Number of gaps > 20min
length(which(clean.trips.loc.2015$Difftime>20))/nrow(clean.trips.loc.2015)*100
## [1] 1.958571
#Number of gaps > 30min
length(which(clean.trips.loc.2015$Difftime>30))/nrow(clean.trips.loc.2015)*100
## [1] 1.266444
##Interpolation
 reso <- 15 * 60       #time resolution between 2 consecutive points in seconds

new.trip<-NULL
loc.interpolated.2015<-NULL
id<-unique(clean.trips.loc.2015$ID)

 #interpolate data for each individual separately 
for (x in 1:length(id)){
  sub<-subset(clean.trips.loc.2015,clean.trips.loc.2015$ID==id[x])
  trip<-unique(sub$TravelNb)

   #interpolate data for each trip separately 
  for (y in 1:length(trip)){
    sub.trip<-subset(sub,sub$TravelNb==trip[y])
    sub.trip$TimeSinceOrigin<-rep(0,nrow(sub.trip))

  for (z in 1:nrow(sub.trip)){
    sub.trip$TimeSinceOrigin[z]<-difftime(sub.trip$DateTime[z],sub.trip$DateTime[1],units="sec")
  }

 
  # Resampling of long and lat with regular time intervals 
  sub.lat1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Latitude,
                    n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
                    deltat=reso, methods="linear",xmin=0,units="sec")
  sub.lon1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Longitude,
                    n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
                    deltat=reso, methods="linear",xmin=0,units="sec")

  new.sub <- data.frame(Longitude=sub.lon1[[2]]$Series, Latitude=sub.lat1[[2]]$Series, Time=as.vector(sub.lon1[[1]]),
                        DateTime=sub.trip$DateTime[1]+as.vector(sub.lon1[[1]]),TravelNb=trip[y])

  #Recreate a dataframe with individual and trip characteristics
  new.sub$ID<-id[x]
  new.sub$Status<-rep(unique(sub$Status),nrow(new.sub))
  new.sub$Distmax<-as.vector(rdist.earth(new.sub[,c("Longitude","Latitude")],
                                         colony[1,c(1,2)],miles=F))
  new.sub$Difftime<-c(0,difftime(new.sub$DateTime[2:nrow(new.sub)],
                                 new.sub$DateTime[1:nrow(new.sub)-1],units="mins"))
  new.sub$Distadj<-0
  new.sub$TravelID<-paste(new.sub$ID,new.sub$TravelNb,sep=".")

  for (z in 2:nrow(new.sub)){
    new.sub$Distadj[z]<-rdist.earth(new.sub[z,c("Longitude","Latitude")],
                                    new.sub[z-1,c("Longitude","Latitude")], miles=F)
  }

 new.trip<-rbind(new.trip,new.sub)
  }
}

#create a new global dataset with interpolated data
loc.interpolated.2015<-new.trip[,c("ID","Status","TravelNb","TravelID","DateTime","Longitude","Latitude",
                                   "Distmax","Difftime","Distadj")]


save(loc.interpolated.2015,file="Data/NewlyCreatedData/clean_interp_loc_2015.RData")

Statistics

Linear mixed models are applied to test whether the trip variables are different between failed and successful breeders on the raw trips

raw.trips.2015<-raw_trips_summary_ind(dataset.2015)

maxdist<-lmer(sqrt(Distmax) ~ Status + (1|ID),data=raw.trips.2015)
dur<-lmer(sqrt(TripDur) ~ Status + (1|ID),data=raw.trips.2015)
totdist<-lmer(sqrt(TotalPath) ~ Status + (1|ID),data=raw.trips.2015)

anova(maxdist)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## Status 44.664  44.664     1 17.763  1.1189 0.3043
anova(dur)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## Status 9.1888  9.1888     1 15.809  1.1464 0.3004
anova(totdist)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)  
## Status 448.09  448.09     1 16.724  4.5292 0.0485 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Power analysis based on Simr package